Part 1: PCA With Penguins
penguin_pca <- penguins %>%
select(body_mass_g,
ends_with("_mm")) %>%
drop_na() %>% # To listwise delete all observations with "na".
scale() %>% # To change all numerical values to a -1 to 1 decimal scale of similarity.
prcomp() # To tell R that the -1 to 1 decimal scale represents principal component values.
penguin_pca$rotation # To view your PC values for each variable.
## PC1 PC2 PC3 PC4
## body_mass_g 0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm 0.4552503 0.597031143 0.6443012 -0.1455231
## bill_depth_mm -0.4003347 0.797766572 -0.4184272 0.1679860
## flipper_length_mm 0.5760133 0.002282201 -0.2320840 0.7837987
penguin_complete <- penguins %>%
drop_na(body_mass_g,
ends_with("mm")) # Using the full dataset and then only listwise deleting observations for the variables of interest.
autoplot(penguin_pca, # To create a complete ggplot appropriate to a particular data type.
data = penguin_complete,
colour = "species",
loadings = TRUE, # To insert arrows to indicate each PC.
loadings.label = TRUE) + # To add the PC names onto each PC arrow.
theme_minimal()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Part 2: ‘ggplot2’ Customization and Reading in Different File Types
Read in an .xlsx file and do some wrangling
fish_noaa <- read_excel(here("data",
"foss_landings.xlsx")) %>%
clean_names() %>%
mutate(across(where(is.character),
tolower)) %>% # To say mutate columns "across" where a certain character class is, and make those a lower character class.
mutate(nmfs_name = str_sub(nmfs_name,
end = -4)) %>% # To extract and replace substrings from a character vector.
filter(confidentiality == "public")
Make a customized gragh
fish_plot <- ggplot(data = fish_noaa, # To store a graph.
aes(x = year,
y = pounds)) +
geom_line(aes(color = nmfs_name),
show.legend = FALSE) +
theme_minimal()
fish_plot # To show a stored graph (there are so many species that it only output the legend - use 'show.legend = FALSE' to instead show the line graph).
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplotly(fish_plot) # To create an interactive graph.
ggplot(data = fish_noaa,
aes(x = year,
y = pounds,
group = nmfs_name)) +
geom_line() +
theme_minimal() +
gghighlight(nmfs_name == "tunas") # To highlight a certain series.
## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## label_key: nmfs_name
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplot(data = fish_noaa,
aes(x = year,
y = pounds,
group = nmfs_name)) +
geom_line(aes(colour = nmfs_name)) +
theme_minimal() +
gghighlight(max(pounds) > 1e8) # To highlight a certain series above a specified threshold value.
## label_key: nmfs_name
## Warning: Removed 6 row(s) containing missing values (geom_path).

Read in data from a URL, ‘lubridate()’, ‘mutate()’, and make a graph with months in logical order
monroe_wt <- read_csv("https://data.bloomington.in.gov/dataset/2c81cfe3-62c2-46ed-8fcf-83c1880301d1/resource/13c8f7aa-af51-4008-80a9-56415c7c931e/download/mwtpdailyelectricitybclear.csv") %>%
clean_names()
## Parsed with column specification:
## cols(
## date = col_character(),
## kWh1 = col_double(),
## kW1 = col_double(),
## kWh2 = col_double(),
## kW2 = col_double(),
## solar_kWh = col_double(),
## total_kWh = col_double(),
## MG = col_double()
## )
monroe_ts <- monroe_wt %>%
mutate(date = mdy(date)) %>%
mutate(record_month = month(date)) %>%
mutate(month_name = month.abb[record_month]) %>% # To add a column with the appropriate abbreviation for each corresponding month number.
mutate(month_name = fct_reorder(month_name,
record_month)) # To reorder the abbreviated months column so that the below graph results in the x-axis values being in chronological order (Jan. - Dec.).
ggplot(data = monroe_ts,
aes(month_name,
y = total_k_wh)) +
geom_jitter()
